---
title: "Narratives of Backlash? Perceptions of Changing Status Hierarchies in Open-Ended Survey Responses"
subtitle: "Replication file"
author: "Magdalena Breyer, Tabea Palmtag, Delia Zollinger"
date: "`r Sys.Date()`"
output: 
  html_document:
    toc: true
    code_folding: hide
---

Packages


```{r setup, include=TRUE, message = F}
knitr::opts_chunk$set(echo = TRUE, warning = F, message = F)

# packages
if (!require("pacman")) install.packages("pacman") 

pacman::p_load(
  ggpp,                # CRAN v0.5.8-1
  ggthemes,            # CRAN v5.1.0
  gridExtra,           # CRAN v2.3
  kableExtra,          # CRAN v1.4.0
  marginaleffects,     # CRAN 0.28.0
  modelsummary,        # CRAN 2.3.0
  quanteda,            # CRAN v4.3.0
  quanteda.textmodels, # CRAN v0.9.10
  quanteda.textplots,  # CRAN v0.95
  quanteda.textstats,  # CRAN v0.97.2
  tidyverse            # CRAN v2.0.0
)


theme_set(theme_minimal())
```


# Data

```{r}

df_p <- readRDS("../data/df_pfinal.RDS")


```


```{r}

# Terms to delete from open answers

delete <- c(
  "`"	,
  "30"	,
  "ahnung"	,
  "allgemein", 
  "anerkannt"	,
  "anerkennung"	,
  "befinden",
  "beispielsweise"	,
  "bekommen"	,
  "bereiche"	,
  "besser"	,
  "bzw"	,
  "dabei",
  "dafür"	,
  "darunter",
  "davon",
  "denen"	,
  "denke"	,
  "denken",
  "desto",
  "doch",
  "dh",
  "d.h",
  "eher"	,
  "eigentlich"	,
  "eigenschaften",
  "eindeutig"	,
  "einmal"	,
  "erfahren"	,
  "erhalten"	,
  "etc"	,
  "evtl",
  "finde",
  "früher"	,
  "frage",
  "ganzen",
  "gar"	,
  "geht"	,
  "gerade",
  "gesellschaft"	,
  "gestellt"	,
  "gewinnen"	,
  "gewonnen"	,
  "geworden"	,
  "gibt"	,
  "gilt"	,
  "große"	,
  "großer"	,
  "großen",
  "gruppe"	,
  "gruppen"	,
  "gut"	,
  "gute"	,
  "gutes"	,
  "habe"	,
  "halten"	,
  "heute"	,
  "heutigen"	,
  "heutzutage", 
  "hohe"	,
  "hohen"	,
  "hoher"	,
  "höher"	,
  "ihre"	,
  "ihrem"	,
  "ihren"	,
  "ihrer"	,
  "ja",
  "je",
  "jedoch",
  "jemand",
  "jahre",
  "jahren"	,
  "jahrzehnte",
  "jetzt",
  "k.a",
  "kommen"	,
  "kommt"	,
  "können"	,
  "lassen"	,
  "laufen"	,
  "leben"	,
  "lebensweisen",
  "legen",
  "leute"	,
  "mal"	,
  "man"	,
  "manchmal"	,
  "mehr"	,
  "meinung"	,
  "meinungen",
  "meist"	,
  "menschen"	,
  "mich"	,
  "müssen"	,
  "niemals"	,
  "obwohl"	,
  "oftmals"	,
  "personen"	,
  "position", 
  "positionen",
  "respekt"	,
  "respektiert",
  "schon"	,
  "sehe",
  "sehen",
  "sei",
  "sicherlich"	,
  "siehe_oben"	,
  "sowie"	,
  "stark",
  "stehen"	,
  "steht"	,
  "stellen"	,
  "teil",
  "teils"	,
  "teilweise"	,
  "tendenziell"	,
  "unserer",
  "usw"	,
  "u",
  "und_co",
  "u_ä",
  "u_u",
  "überhaupt",
  "verbessert",
  "verfügen"	,
  "verlieren"	,
  "verliert",
  "verloren"	,
  "viel"	,
  "vielen",
  "weiß"	,
  "weiss",
  "wenig"	,
  "weniger"	,
  "will"	,
  "wohl"	,
  "wollen"	,
  "wurde"	,
  "wurden"	,
  "z.b"	,
  "zb",
  "z_b"
)

# Terms to compound (per question)

comp_winning <- c("viel geld", "sozial* berufe", "sozial* medien", "social media*", "soziale* bereiche*", "soziale* netzwerke*",
                  "soziale* herkunft",
                  "gut* *bildung","besser* bildung", "gute* einkommen", "gute* hause",
                  "weiß nicht", "weiß ich nicht", "weiß es nicht",
                  "kein* ahnung", "kann ich nicht sagen",
                  "nicht die geringste ahnung", "kene ahnung",
                  "weis* nicht", "fällt dazu nichts", "fällt mir",
                  "fällt nicht*", "fällt nix",
                  "keine angabe", "keine angaben", "z b",
                  "u u", "u ä")

comp_losing <- c("einfache* arbeiter", "einfache* leute", "wenig geld", "soziale* berufe*", 
                 "sozial schwache*","hart arbeitende*", "har*z 4", "hartz iv", "handwerklich* berufe*", 
                 "normale* arbeiter", "viel geld", "nicht viel geld", "mal so viel geld", "oben herab", "öffentliche* dienst",
                 "weiß nicht", "kein* ahnung", "kann ich nicht sagen",
                 "nicht die geringste ahnung", "kene ahnung",
                 "weis* nicht", "fällt dazu nichts", "fällt mir",
                 "fällt nicht*", "fällt nix", "siehe oben",
                 "keine angabe", "keine angaben", "z b",
                 "u u", "u ä")

comp_high <- c("viel geld", "mehr geld", "sich * leisten",
               "hohe* einkommen", "reich geboren", "keine sorgen",
               "gut* einkommen", "was besseres", "gut* *bildung",
               "finanziell *gesichert", "finanziell unabhängig",
               "über leichen", "vitamin b", "höhere* dienst*","öffentliche* dienst",
               "eigene* vorteil","auf kosten",
               "weiß nicht", "kein* ahnung", "kann ich nicht sagen",
               "nicht die geringste ahnung", "kene ahnung",
               "weis* nicht", "fällt dazu nichts", "fällt mir",
               "fällt nicht*", "fällt nix", "keine angabe", "keine angaben", "z b",
               "u u", "u ä")

comp_medium <- c("wie ich", "gut* *bildung", "leisten können", "nach oben",
                 "gerade so", "mittler* einkommen", "gut* einkommen",
                 "runden kommen", "sozial engagiert", "hart arbeitend*",
                 "genug geld", "mit dem strom", "eigene meinung", "9 to 5", "klein unternehmer",
                 "öffentliche* dienst","mittlerer dienst", "höhere* dienst",
                 "weiß nicht", "kein* ahnung", "kann ich nicht sagen",
                 "nicht die geringste ahnung", "kene ahnung",
                 "weis* nicht", "fällt dazu nichts", "fällt mir",
                 "fällt nicht*", "fällt nix","keine angabe", "keine angaben", "z b",
                 "u u", "u ä")

comp_low <- c("wenig geld", "einfache* arbeiter", "wenig geld", "soziale* berufe", "sozial schwache*", "weiße männer",
              "hart arbeitende*", "ältere* menschen", "har*z 4", "hartz iv", "handwerklich* berufe", 
              "viel geld", "nicht viel geld", "mal so viel geld", "ohne ausbildung", "gering* einkommen", 
              "keine arbeit", "runden kommen", "nicht bereit", "alg i", "alg ii", "arbeitslosengeld ii", "arbeitslosengeld i und ii",
              "gering verdiener", "viele kinder", "schlechte bildung", "schlecht bezahlt*", "keine *bildung",
              "mangelnde bildung", "geringe* bildung",
              "nicht arbeiten", "in den mund", "auf der straße", "ohne ausbildung", "nicht arbeiten", 
              "keine arbeit", "ohne arbeit", "auf kosten",
              "weiß nicht", "kein* ahnung", "kann ich nicht sagen",
              "nicht die geringste ahnung", "kene ahnung",
              "weis* nicht", "fällt dazu nichts", "fällt mir",
              "fällt nicht*", "fällt nix", "keine angabe", "keine angaben", "z b",
              "u u", "u ä")

```

## Dictionary

```{r}

dict_groups <- dictionary(list(
  no_content = list(
    dont_know = c("weiß_nicht", "weis_nicht", "weiss_nicht", "weiss_ich_nicht", "weiß_ich_nicht", "weis_ich_nicht", "weiß_es_nicht",#compound
                  "keine_ahnung", "keine_richtige_ahnung", "nicht_die_geringste_ahnung","kene_ahnung",
                  "kann_ich_nicht_sagen", "fällt_dazu_nichts", "fällt_mir",
                  "fällt_nicht", "fallt_nichts", "fällt_nix", "keine_angabe", "keine_angaben",
                  "ka", "k.a")
  ),
  socioeconomic = list(
    occ = list(
      elite = c("ärzte", "manager", "unternehmer", "wissenschaftler", "banker", "bänker", "banken",
                "geschäftsführer", "selbstständige", "selbstständig", "chefs", "vorstände", "wirtschaftsbosse",
                "juristen", "anwälte", "firmen", "steuerberater", "management", "geschäftsleute",
                "firmenchefs", "geschäftsmänner", "professoren", "lobbyisten", "journalisten"),
      state = list(
        police = c("polizei", "polizisten", "feuerwehr", "polizeibeamte", "soldaten"),
        other_state = c("beamte", "lehrer", "öffentlichen_dienst", "öffentlicher_dienst",
                        "höheren_dienst", "höherer_dienst", "mittlerer_dienst", "mittleren_dienst", "dienst")
      ),
      social = c("soziale_berufe", "sozialen_berufen", "pfleger", "pflegerinnen", "pflegepersonal", "pflegende",
                 "pflegekräfte", "pflege", "medizinisches", "medizinischen", "krankenhäusern", "krankenhäuser",
                 "krankenschwester", "krankenschwestern", "erzieher", "altenpfleger", "sozialen_bereich",
                 "pflegeberufe", "pflegeberufen", "gesundheitswesen", "rettungsdienst", "mediziner",
                 "sozialarbeiter", "sanitäter"),
      production = c("handwerker", "handwerk", "handwerksberufe", "handwerkliche", "handwerkliche_berufe", "handwerklichen_berufen",
                     "arbeiter", "landwirte", "facharbeiter", "meister", "bäcker"),
      tech = c("it", "digital", "internet", "informatiker", "technik"),
      service = c("verkäufer", "einzelhandel", "niedriglohnsektor", "dienstleister",
                  "reinigungskräfte"),
      generic = c("angestellte", "arbeitende", "arbeit", "beruf", "berufe", "berufen", "berufliche",
                  "job", "jobs", "arbeitnehmer", "arbeiten", "normale_arbeiter", "normalen_arbeiter", "fachkräfte",
                  "hart_arbeitende", "hart_arbeitend", "klein_unternehmer", "kleinunternehmer", "erwerbstätige", "arbeitsmarkt",
                  "berufstätige", "berufstätigen", "personal", "beschäftigte", "beschäftigten",
                  "mitarbeiter", "steuerzahler", "9_to_5", "gearbeitet", "angestellt", "wirtschaft"),
      welfare = c("arbeitslose", "arbeitslos", "arbeitslosigkeit", "nicht_arbeiten", "keine_arbeit", "ohne_arbeit",
                  "alg_i", "alg_ii", "arbeitslosengeld_ii", "arbeitslosengeld",
                  "rente", "langzeitarbeitslose"),
      new = list(
        social_media = c( "influencer", "influenzer", "sozialen_medien",
                          "soziale_medien", "social_media"),
        sports = c("sportler", "profisportler", "fußballer", "fussballer", "profifußballer",
                   "profi_sportler", "profi_fußballer", "profi_fussballer", "formel_1"), 
        stars = c("promis", "prominente", "stars", "berühmte", "sänger")
      )), 
    wealth = list(
      rich = c("reiche", "reich", "reichen", "viel_geld", "wohlhabende", "reichtum",
               "wohlhabend", "millionäre", "erben", "geerbt", "vermögen", "vermögend", "luxus",
               "hohes_einkommen", "wohlstand", "unternehmen", "superreiche", "superreichen", "autos",
               "großverdiener", "gutverdienende", "gutes_einkommen", "bonzen", "obere_zehntausend",
               "oberen_zehntausend", "vielverdiener", 
               "finanziell_gesichert", "finanziell_abgesichert", "finanziell_unabhängig"
      ),
      poor = c("obdachlose", "arme", "geringverdiener", "gering_verdiener", "arm", "armut", "ärmer", "wenig_geld",
               "empfänger", "hartz_4", "hartz4", "hartz_iv", "harz_4","harz_iv", "sozial_schwache",
               "sozialhilfeempfänger", "sozialleistungen", "sparen", "runden_kommen", "in_den_mund",
               "schmarotzer", "sozialschmarotzer", "geringem_einkommen", "geringes_einkommen", "amt",
               "schlecht_bezahlte", "prekäre", "existenzminimum"), 
      generic = c("einkommen", "lohn", "gehalt", "geld", "besitz", "besitzen", "finanziell", 
                  "finanziellen", "finanzielles", "steuern", "eigentum", "verdiener",
                  "urlaub", "reisen", "haus", "knete", "auto", "verdienende", "miete",
                  "bezahlt", "bezahlen", "verdienst", "wirtschaftlich", "lebensunterhalt")
    )),
  education = list(
    high = c("gebildete", "gebildet", "studiert", "studierte", "studierten", "akademiker",
             "guter_ausbildung", "gute_ausbildung", "gute_bildung", "studieren", 
             "bessere bildung", "besserer_bildung", "studenten",
             "abitur", "studierende", "bildungsbürgertum", "studium"),
    low = c("bildungsfern", "ungelernt", "ungelernte", "ungebildet", "ungebildete",
            "schlechte_bildung", "keine_bildung", "mangelnde_bildung", "geringe_bildung",
            "ohne_ausbildung"),
    generic = c("bildung", "ausbildung", "weiterbilden", "lernen", "bildungsgrad",
                "schulbildung", "bildungsniveau", "ausbildungsberufe", "ausgebildete",
                "bildungsstand", "qualifikation", "bildungschancen", "bildungsabschluss",
                "schulabschluss", "schule", "abschluss")
  ),
  sociocultural = list(
    gender = c("frauen", "männer", "geschlecht", "frau", "mann", "feministen", "feministinnen", 
               "mütter", 
               "weisse_männer", "weiße_männer" 
    ), 
    sexuality = c("homosexuelle", "homo", "lgbtq", "lgbtqia", "lgbt", "lgbtx", "schwule", "schwul", "lesben", "lesbisch", 
                  "queere", "queer", "gay",
                  "heterosexuelle", 
                  "transgender", "transsexuell", "transsexuelle", "trans menschen"),
    race = c("migranten", "migrationshintergrund", "ausländer", "zuwanderer",
             "flüchtlinge", "kanacken", "einwanderer", "integration", "geflüchtete", "asylanten",
             "deutsche", "deutschen", "weiße", "deutschland", "volk", "einheimische", "weisse",
             "herkunft", "integrieren", "bipoc", "bpoc", "poc"),
    marginalized = c("minderheiten", "randgruppen", "rand")
  ),
  attribute = list(
    personality = list(
      negative = c("arrogant", "arroganz", "arrogante", "egoistisch", "egoisten", "schlecht", "schlechte", "schlechter",
                   "abgehoben", "überheblich", "egozentrisch", "oberflächlich", "skrupellos",
                   "faul", "faule","lügner", "schwache", "über_leichen", "korrupt", "gier",
                   "betrüger", "auf_kosten", "nicht_bereit", "abhängig", "angeber"),
      neutral = c("normale", "normal", "einfach", "einfache", "einfachen", "normalen", "wie_ich",
                  "neues", "neue", "neu", "alleine", "unauffällig", "durchschnitt", "durchschnittlichem",
                  "durchschnittliches", "sorgen", "klein", "kleine"),
      positive = c("sozial", "soziale", "offen", "weltoffen", "zielstrebig", "fleißig", "fleißige",
                   "zufrieden", "bodenständig", "engagiert", "engagement", "engagieren", "ehrgeizig", "glücklich",
                   "hilfsbereit", "interessiert", "unabhängig", "aktiv", "arbeitssam",
                   "wichtig", "wichtigen", "wichtige", "gemeinwohl", "systemrelevant", "systemrelevante",
                   "freundlich", "lieb", "ehrlich")
    ),
    actions = list(
      effort = c("leisten", "leistungen", "verdienen", "verdient", "versuchen", 
                 "nach_oben", "kämpfen", "mut", "erreichen", "mühe",
                 "erfolg", "erfolgreich", "erledigen", "leistungsträger", "bemühen",
                 "mehrwert"),
      luck = c("glück", "vorteil", "chancen", "chance"),
      community = c("teilen", "kümmern", "dienen", "sozialen", "für_andere",
                    "einsetzen"),
      misfortune = c("pech", "geraten"),
      cognitive = c("wissen"), 
      middle = c("mittelschicht", "mittelstand", "mitte", "mittleren", "mittelständler",
                 "mittelklasse")
    )),
  politics = list(
    power = c("politik", "macht", "politiker", "parteien", "regierung", "politisch",
              "politische", "politischen", "lobbyisten", "lobby"),
    groups = c("querdenker", "konservativ", "konservative", "rechtsradikale",
               "nazis", "schwurbler", "grüne", "grünen", "aktivisten", "rechte", "weltverbesserer",
               "linke"),
    environment = c("umwelt", "umweltaktivisten", "umweltschützer", "veganer",
                    "klimaaktivisten")
  )
))



```

## Pre-processing text

#### Winning groups

```{r}

# 1. remove short answers

df_p <- df_p %>% 
  mutate(nchar_win = nchar(rg_winning)) 

df_p_keepwin <- filter(df_p, nchar_win >=3)

# 2. corpus, tokenization, lower case

corp_rgwin <- corpus(df_p_keepwin, text_field = "rg_winning")

toks_win <- tokens(corp_rgwin, remove_punct = TRUE, remove_symbols = TRUE, padding = FALSE) %>% 
  tokens_tolower()

# 3. compound those expressions that have meaning (see above)

toks_win_comp <- tokens_compound(toks_win, pattern = phrase(comp_winning)) 

# 4. stopwords and other terms to delete

toks_win_comp <- toks_win_comp %>% 
  tokens_remove(pattern = c(stopwords("de", source = 
                        "marimo"), delete), 
                valuetype = "fixed", padding = TRUE)

# 5. text matrix and delete white space

dfmat_win <- toks_win_comp %>% 
  dfm() %>% 
  dfm_trim(max_termfreq = 5000, termfreq_type = "count") # to remove white space (or tab)

```

#### Losing groups

```{r}

# 1. remove short answers

df_p <- df_p %>% 
  mutate(nchar_lose = nchar(rg_losing)) 

df_p_keeplose <- filter(df_p, nchar_lose >=3)

# 2. corpus, tokenization, lower case

corp_rglose <- corpus(df_p_keeplose, text_field = "rg_losing")

# tokens

toks_lose <- tokens(corp_rglose, remove_punct = TRUE, remove_symbols = T, padding = F) %>% 
  tokens_tolower()

# 3. compound those expressions that have meaning

toks_lose_comp <- tokens_compound(toks_lose, pattern = phrase(comp_losing))

# 4. stopwords and other terms to delete

toks_lose_comp <- toks_lose_comp %>% 
  tokens_remove(pattern = c(stopwords("de", source = 
                        "marimo"), 
                        delete
                        ), 
                valuetype = "fixed", padding = TRUE)

# 5. text matrix and delete white space

dfmat_lose <- toks_lose_comp %>% 
  dfm() %>% 
  dfm_trim(max_termfreq = 5000, termfreq_type = "count") # to remove white space (or tab)

```


#### High groups

```{r}
# 1. remove short answers

df_p <- df_p %>% 
  mutate(nchar_high = nchar(rg_high)) 

df_p_keephigh <- filter(df_p, nchar_high >=3)

# 2. corpus, tokenization, lower case

corp_rghigh <- corpus(df_p_keephigh, text_field = "rg_high")

# tokens

toks_high <- tokens(corp_rghigh, remove_punct = TRUE, remove_symbols = T, padding = F) %>% 
  tokens_tolower()

# 3. compound those expressions that have meaning:

toks_high_comp <- tokens_compound(toks_high, pattern = phrase(comp_high))

# 4. stopwords and other terms to delete

toks_high_comp <- toks_high_comp %>% 
  tokens_remove(pattern = c(stopwords("de", source = 
                        "marimo"), delete, "rand", "oben", "oberen"), 
                valuetype = "fixed", padding = TRUE)

# 5. text matrix and delete white space

dfmat_high <- toks_high_comp %>% 
  dfm() %>% 
  dfm_trim(max_termfreq = 5000, termfreq_type = "count") # to remove white space (or tab)
```


#### Medium groups

```{r}

# 1. remove short answers

df_p <- df_p %>% 
  mutate(nchar_med = nchar(rg_med)) 

df_p_keepmed <- filter(df_p, nchar_med >=3)

# 2. corpus, tokenization, lower case

corp_rgmed <- corpus(df_p_keepmed, text_field = "rg_med")

# tokens

toks_med <- tokens(corp_rgmed, remove_punct = TRUE, remove_symbols = TRUE, padding = F) %>% 
  tokens_tolower()

# 3. compound

toks_med_comp <- tokens_compound(toks_med, pattern = phrase(comp_medium))

# 4. stopwords and other terms to delete

toks_med_comp <- toks_med_comp %>% 
  tokens_remove(pattern = c(stopwords("de", source = 
                        "marimo"), delete, "mitte", "mittlere", "5"), 
                valuetype = "fixed", padding = TRUE)

# 5. text matrix and delete white space

dfmat_med <- toks_med_comp %>% 
  dfm() %>% 
  dfm_trim(max_termfreq = 5000, termfreq_type = "count") # to remove white space (or tab)

```


#### Low groups

```{r}

# 1. remove short answers

df_p <- df_p %>% 
  mutate(nchar_low = nchar(rg_low)) 

df_p_keeplow <- filter(df_p, nchar_low >=3)

# 2. corpus, tokenization, lower case

corp_rglow <- corpus(df_p_keeplow, text_field = "rg_low")

# tokens

toks_low <- tokens(corp_rglow, remove_punct = TRUE, remove_symbols = TRUE, padding = F) %>% 
  tokens_tolower()

# 3. compound

toks_low_comp <- tokens_compound(toks_low, pattern = phrase(comp_low))

# 4. stopwords and other terms to delete

toks_low_comp <- toks_low_comp %>% 
  tokens_remove(pattern = c(stopwords("de", source = 
                        "marimo"), delete, "rand", "unteren", "oberen"), 
                valuetype = "fixed", padding = TRUE)

# 5. text matrix and delete white space

dfmat_low <- toks_low_comp %>% 
  dfm() %>% 
  dfm_trim(max_termfreq = 5000, termfreq_type = "count") # to remove white space (or tab)
```




# Figure 2

```{r, fig.height=10, fig.width=8}

# for plots: dictionary as df and then merge to textstat_frequency object

dict_df <- dict_groups %>% 
  unlist() %>% 
  tibble(dimension = gsub("[0-9]", "", names(.)),
         feature = .) %>% 
  separate(col =  dimension, into = c("dimension", "subdimension"), "\\.") %>% 
  filter(dimension %in% c("socioeconomic", "sociocultural", "education")) %>% 
  mutate(
    dimension = fct_relevel(factor(dimension), "socioeconomic", "education", "sociocultural")
  )

# HIGH

high_dim <- textstat_frequency(dfmat_high, n = 20)

translations_high <- c("wealth", "wealth", "the wealthy", "earn", "aloof", "influence", "academics", "to work", "wealthy", "egoistic", "arrogant", "power", "doctors", "entrepreneurs",  "managers", "rich", "a lot of money", "money", "the rich", "politicians")

high_dim <- dict_df %>% 
  left_join(high_dim, ., by = c("feature"))

plot_high_f <- high_dim %>%  
  ggplot(aes(x = reorder(feature, frequency), y = frequency, fill = dimension)) +
  geom_col(width = 0.7, size = 0.2, color = "black") +
  coord_flip() +
  scale_y_continuous(limits = c(0, 410)) +
  scale_fill_manual(values = c("gray1", "gray40", "gray72"),
                    na.value = "gray99",
                    name = NULL,
                    na.translate = F) +
    annotate(geom = "text", x = 1:20, y = 410, size = 2.5, color = "gray25", 
             label = translations_high, hjust = 1)+
  labs(x = NULL, y = "Frequency", subtitle = "Static: high")+
  theme(legend.position = "bottom",
        axis.text.y = element_text(size = 8))

# LOW

low_dim <- textstat_frequency(dfmat_low, n = 20)

translations_low <- c("lazy", "work", "low-income earners", "education", "income", "state", "poor", "the sick", "welfare recipients", "hartz 4", "money", "to work", "single parents", "recipients", "the poor", "migrants", "workers", "homeless", "pensioners", "the unemployed")

low_dim <- dict_df %>% 
  left_join(low_dim, ., by = c("feature"))

plot_low_f <- low_dim %>%  
  ggplot(aes(x = reorder(feature, frequency), y = frequency, fill = dimension)) +
  geom_col(width = 0.7,  size = 0.2, color = "black") +
  coord_flip() +
  scale_fill_manual(values = c("gray1", "gray40", "gray72"),
                    na.value = "gray99",
                    name = NULL,
                    na.translate = F) +
    annotate(geom = "text", x = 1:20, y = 460, size = 2.5, color = "gray25", 
             label = translations_low, hjust = 1)+
  labs(x = NULL, y = "Frequency", subtitle = "Static: low")+
  theme(legend.position = "bottom",
         axis.text.y = element_text(size = 8))

plots_freq_static <- arrangeGrob(plot_high_f, plot_low_f,
                          nrow = 2, heights = unit(c(9, 9), c("cm"))
                         )

grid.arrange(plots_freq_static)

```

# Figure 3

```{r, fig.height=10, fig.width=8}

# Winning

win_dim <-  textstat_frequency(dfmat_win, n = 20)

translations_win <- c("perform", "education", "occupations", "children", "time", "graduates", "don't know", "work", "migrants", "politicians", "academics", "no idea", "homosexuals", "doctors", "influencers", "to work", "the rich", "money", "craftsmen", "women")

win_dim <- dict_df %>% 
  left_join(win_dim, ., by = c("feature"))

plot_win_f <- win_dim %>% 
  ggplot(aes(x = reorder(feature, frequency), y = frequency, fill = dimension)) +
  geom_col(width = 0.7, size = 0.2, color = "black") +
  coord_flip() +
  scale_y_continuous(limits = c(0, 290)) +
  scale_fill_manual(values = c("gray1", "gray40", "gray72"),
                    na.value = "gray99",
                    name = NULL,
                    na.translate = F) +
  annotate(geom = "text", x = 1:20, y = 290, size = 2.5, color = "gray25", 
           label = translations_win, hjust = 1)+
  labs(x = NULL, y = "Frequency", subtitle = "Dynamic: winning") +
  theme(legend.position = "bottom",
         axis.text.y = element_text(size = 8))

# Losing

lose_dim <-  textstat_frequency(dfmat_lose, n = 20)

translations_lose <- c("money", "no idea", "men", "migrants", "civil servants", "doctors", "occupations", "middle class", "don't know", "the elderly", "the unemployed", "pensioners", "policemen", "work", "workers", "to work", "police", "teachers", "craftsmen", "politicians")

lose_dim <- dict_df %>% 
  left_join(lose_dim, ., by = c("feature"))

plot_lose_f <- lose_dim %>% 
  ggplot(aes(x = reorder(feature, frequency), y = frequency, fill = dimension)) +
  geom_col(width = 0.7, size = 0.2, color = "black") +
  coord_flip() +
  scale_fill_manual(values = c("gray1", #"gray40", 
                               "gray72"),
                    na.value = "gray99",
                    name = NULL,
                    na.translate = F) +
  annotate(geom = "text", x = 1:20, y = 400, size = 2.5, color = "gray25", 
           label = translations_lose, hjust = 1)+
  labs(x = NULL, y = "Frequency", subtitle = "Dynamic: losing") +
  theme(legend.position = "bottom",
         axis.text.y = element_text(size = 8))

plots_freq_dyn <- arrangeGrob(plot_win_f, plot_lose_f,
                          nrow = 2, heights = unit(c(9, 9), c("cm")))

grid.arrange(plots_freq_dyn)
```

# Table 1

```{r, cache = T}

# Apply dictionary

df_win_dict <- tokens_lookup(toks_win_comp, dict_groups, valuetype = "glob") %>% 
  dfm() %>% 
  convert(to = "data.frame") %>% 
  bind_cols(docvars(toks_win_comp, "ResponseId")) %>% 
  rename(ResponseId = last_col()) %>% 
  select(-doc_id) %>% 
  mutate(Question = "dynamic",
         Type = "winning")

df_lose_dict <- tokens_lookup(toks_lose_comp, dict_groups, valuetype = "glob") %>% 
  dfm() %>% 
  convert(to = "data.frame") %>%
  bind_cols(docvars(toks_lose_comp, "ResponseId")) %>% 
  rename(ResponseId = last_col()) %>% 
  select(-doc_id) %>% 
  mutate(Question = "dynamic",
         Type = "losing")

df_high_dict <- tokens_lookup(toks_high_comp, dict_groups, valuetype = "glob") %>% 
  dfm() %>% 
  convert(to = "data.frame") %>% 
  mutate(Question = "static")

df_med_dict <- tokens_lookup(toks_med_comp, dict_groups, valuetype = "glob") %>% 
  dfm() %>% 
  convert(to = "data.frame") %>% 
  mutate(Question = "static")

df_low_dict <- tokens_lookup(toks_low_comp, dict_groups, valuetype = "glob") %>% 
  dfm() %>% 
  convert(to = "data.frame") %>% 
  mutate(Question = "static")

df_dyn_stat <- bind_rows(df_win_dict, df_lose_dict, df_high_dict, df_med_dict, df_low_dict) %>% 
  rowwise() %>% 
  mutate(
    sum_eco = sum(c_across(starts_with("socioeconomic")), na.rm = T), # sum per answer,
    sum_cult = sum(c_across(starts_with("sociocultural")), na.rm = T), # sum per answer,
    sum_edu = sum(c_across(starts_with("education")), na.rm = T), # sum per answer
    sum_pol = sum(c_across(starts_with("politics")), na.rm = T), # sum per answer,
    sum_attr = sum(c_across(starts_with("attribute")), na.rm = T), # sum per answer
    sum_dk = sum(c_across(starts_with("no_content")), na.rm = T),
    eco = if_else(sum_eco > 0, 1, 0), # binary: 1 is eco. mentioned, 0 not
    cult = if_else(sum_cult > 0, 1, 0), # binary: 1 is cult. mentioned, 0 not
    edu = if_else(sum_edu > 0, 1, 0), # binary: 1 is edu. mentioned, 0 not
    pol = if_else(sum_pol > 0, 1, 0),
    attr = if_else(sum_attr > 0 , 1, 0),
    dk = if_else(sum_dk > 0, 1, 0)
    ) %>% 
  ungroup()

# Table

df_dyn_stat %>% 
  group_by(Question) %>% 
  summarise( 
          "Socioeconomic.N" = sum(eco == 1),
          "Socioeconomic.%" = mean(eco) * 100,
          "Sociocultural.N" = sum(cult == 1),
          "Sociocultural.%" = mean(cult) * 100,
          "Education.N" = sum(edu == 1),
          "Education.%" = mean(edu) * 100,
          "Total.N" = n(),
          "Total.%" = 100,
          ) %>% 
  ungroup() %>% 
  pivot_longer(cols  = -Question, names_to = c("Dimension", ".value"), 
    names_sep = "\\.") %>% 
  arrange(fct_relevel(factor(Dimension), 
                            "Socioeconomic", "Sociocultural", "Education",
                           "Total"
                      ), 
          desc(Question)) %>% 
  relocate(Dimension) %>% 
  kbl(caption = "Salience of dictionary dimensions (percentage of answers mentioning the dimension by question type)",
      booktabs = T,
      digits = 1,
      #format = "latex"
      ) %>% 
  collapse_rows(columns = 1:2, latex_hline = "major") %>% 
  kable_styling()

```


# Figure 4

```{r, fig.height=8, fig.width=10}

#translations
translations_winatt <- c("politicians", "the rich", "don't care", "earnings", "top", "don't know", "immigrants", "Greens", "no idea", "don't know", "to care", "income", "worries", "arrogant", "wealth", "foreigners", "cars", "proud", "worked", "asylum seekers", "celebrities", "skilled workers", "computer scientists", "own", "scientists", "brought", "social occupations", "community", "gender", "open", "disability", "gender inclusive form", "migration background", "occupations", "cosmopolitan", "influencers", "social", "environmentalists", "LGBTQ", "women")

# compute keyness and merge to dictionary to categorize terms
tstat_key_win_att <- textstat_keyness(dfm_group(dfmat_win, groups = dfmat_win$galtan_group),
                                  target = "progressive") %>% 
  left_join(dict_df, by = c("feature")) %>% 
  mutate(
    targetgroup = if_else(chi2 >= 0, "target", "reference"),
    chi_abs = abs(chi2) 
  ) %>% 
  group_by(targetgroup) %>% 
  slice_max(chi_abs, n = 20, with_ties = F) %>% 
  filter(!is.nan(chi2))
  
# plot

ggplot(data = tstat_key_win_att, aes(x = reorder(feature, chi2), y = chi2, fill = dimension,
                                     label = feature)) + 
  geom_col(width = 0.7, linewidth = 0.2, color = "black") +
  geom_text(color = "gray25", hjust = "outward", 
            position = position_nudge_center(y = 0.4,
                                               direction = "split")
    ) +
  coord_flip() +
  scale_x_discrete(labels = translations_winatt) +
  scale_y_continuous(
    limits = c(-23, 30)
    ) +
  scale_fill_manual(values = c("gray1", 
                                      #"gray40", 
                                      "gray72"),
                    na.value = "gray99",
                    name = NULL, na.translate = F) +
  labs(x = NULL, y = "Chi2") +
  theme(legend.position = "bottom",
        axis.text.y = element_text(size = 8)) +
  annotate(geom = "text", x = c(-1, 42), y = c(-10, 15), size = 4, color = "black",
           label = c("social conservatives", "progressives")) +
  expand_limits(x = c(-2, 43))


```

# Figure 5


```{r, fig.height=8, fig.width=10}

#translations
translations_loseatt <- c("Germans", "pensioners", "country", "foreigners", "mid tier", "the honest", "don't know", "the poor", "citizens", "to try", "employees", "to speak out", "Germans", "taken", "no", "low-income earners", "important", "heavy", "the normal", "serious", "COVID skeptics", "to reach", "active", "conservative", "illness", "performance", "lawyers", "big", "nazis", "social occupations", "schooling","crafts", "migration background", "situation", "academics", "social occupations", "teachers", "opinions", "bankers", "gender inclusive form")

# compute keyness and merge to dictionary to categorize terms
tstat_key_lose_att <- textstat_keyness(dfm_group(dfmat_lose, groups = dfmat_lose$galtan_group),
                                  target = "progressive") %>% 
  left_join(dict_df, by = c("feature")) %>% 
  mutate(
    targetgroup = if_else(chi2 >= 0, "target", "reference"),
    chi_abs = abs(chi2) 
  ) %>% 
  group_by(targetgroup) %>% 
  slice_max(chi_abs, n = 20, with_ties = F) %>% 
  filter(!is.nan(chi2))
  
# plot

ggplot(data = tstat_key_lose_att, aes(x = reorder(feature, chi2), y = chi2, fill = dimension,
                                     label = feature)) + 
  geom_col(width = 0.7, linewidth = 0.2, color = "black") +
  geom_text(color = "gray25", hjust = "outward",
            position = position_nudge_center(y = 0.4,
                                               direction = "split")
    ) +
  coord_flip() +
  scale_x_discrete(labels = translations_loseatt) +
  scale_y_continuous(
    limits = c(-28, 33)
    ) +
  scale_fill_manual(values = c("gray1", 
                                      "gray40", 
                                      "gray72"),
                    na.value = "gray99",
                    name = NULL, na.translate = F) +
  labs(x = NULL, y = "Chi2") +
  theme(legend.position = "bottom",
        axis.text.y = element_text(size = 8)) +
  annotate(geom = "text", x = c(-1, 42), y = c(-10, 7.5), size = 4, color = "black",
           label = c("social conservatives", "progressives")) +
  expand_limits(x = c(-2, 43))

```

# Figure 6

```{r, fig.height=10, fig.width=8}
# merge dictionary results to dataframe with respondent vars

df_p_windict <- left_join(df_win_dict, df_p, by = "ResponseId") %>%
  mutate(sociocultural = if_else(rowSums(select(., starts_with("sociocultural."))) >= 1, 1, 0),
         socioeconomic = if_else(rowSums(select(., starts_with("socioeconomic."))) >= 1, 1, 0),
         soccult_race = if_else(sociocultural.race >= 1, 1, 0),
         soccult_gendersex = if_else(rowSums(select(., starts_with(c("sociocultural.gender", "sociocultural.sexuality")))) >= 1, 1, 0))

df_p_losedict <- left_join(df_lose_dict, df_p, by = "ResponseId") %>%
  mutate(sociocultural = if_else(rowSums(select(., starts_with("sociocultural."))) >= 1, 1, 0),
         socioeconomic = if_else(rowSums(select(., starts_with("socioeconomic."))) >= 1, 1, 0),
         soccult_race = if_else(sociocultural.race >= 1, 1, 0),
         soccult_gendersex = if_else(rowSums(select(., starts_with(c("sociocultural.gender", "sociocultural.sexuality")))) >= 1, 1, 0))

# Panel A

model_soccultw3 <- glm(sociocultural ~ galtan_group + att_socialben + age + gender_fm + edu + migrationb + urban + inc_hh,
                 family = "binomial",
                 data = df_p_windict)

model_soccultl3 <- glm(sociocultural ~ galtan_group + att_socialben + age + gender_fm + edu + migrationb + urban + inc_hh,
                 family = "binomial",
                 data = df_p_losedict)

model_racel3 <- glm(soccult_race ~ galtan_group + att_socialben + age + gender_fm + edu + migrationb + urban + inc_hh,
                 family = "binomial",
                 data = df_p_losedict)

model_racew3 <- glm(soccult_race ~ galtan_group + att_socialben + age + gender_fm + edu + migrationb + urban + inc_hh,
                 family = "binomial",
                 data = df_p_windict)

model_gsl3 <- glm(soccult_gendersex ~ galtan_group + att_socialben + age + gender_fm + edu + migrationb + urban + inc_hh,
                 family = "binomial",
                 data = df_p_losedict)

model_gsw3 <- glm(soccult_gendersex ~ galtan_group + att_socialben + age + gender_fm + edu + migrationb + urban + inc_hh,
                 family = "binomial",
                 data = df_p_windict)

# all sociocultural

pred_scl <- avg_predictions(model_soccultl3, by = "galtan_group") %>% 
  mutate(question = "Mentioned as losing groups",
         outcome = "overall\nsociocultural")
pred_scw <- avg_predictions(model_soccultw3, by = "galtan_group") %>% 
  mutate(question = "Mentioned as winning groups",
         outcome = "overall\nsociocultural")


# race
pred_scrl <- avg_predictions(model_racel3, by = "galtan_group") %>% 
  mutate(question = "Mentioned as losing groups",
         outcome = "race")
pred_scrw <- avg_predictions(model_racew3, by = "galtan_group") %>% 
  mutate(question = "Mentioned as winning groups",
         outcome = "race")

# gender

pred_scgl <- avg_predictions(model_gsl3, by = "galtan_group") %>% 
  mutate(question = "Mentioned as losing groups",
         outcome = "gender and\nsexuality")
pred_scgw <- avg_predictions(model_gsw3, by = "galtan_group") %>% 
  mutate(question = "Mentioned as winning groups",
         outcome = "gender and\nsexuality")

# combine

pred_cult <- bind_rows(pred_scl, pred_scw, pred_scrl, pred_scrw, pred_scgl, pred_scgw) %>% 
  mutate(outcome = fct_relevel(factor(outcome), 
                               "overall\nsociocultural", "race", "gender and\nsexuality"),
         question = fct_rev(factor(question)))

gal <- ggplot(pred_cult, aes(outcome, estimate, color = galtan_group)) +
  geom_pointrange(aes(ymin = conf.low, ymax = conf.high),
                  position = position_dodge(width = 0.5)) +
  facet_wrap(~question) +
  scale_color_grey(start = 0.1, end = 0.7, name = "Subgroup")+
  theme(legend.position = "bottom")+
  labs(y = "Predicted probability of mentioning groups", x = NULL,
       subtitle = "A) By sociocultural attitudes"
       )

# Panel B

model_soccultl4 <- glm(sociocultural ~ galtan_att + att_socialben + age + gender_edu + migrationb + urban + inc_hh,
                 family = "binomial",
                 data = df_p_losedict)

model_soccultw4 <- glm(sociocultural ~ galtan_att + att_socialben + age + gender_edu + migrationb + urban + inc_hh,
                 family = "binomial",
                 data = df_p_windict)

model_racel4 <- glm(soccult_race ~ galtan_att + att_socialben + age + gender_edu + migrationb + urban + inc_hh,
                 family = "binomial",
                 data = df_p_losedict)

model_racew4 <- glm(soccult_race ~ galtan_att + att_socialben + age + gender_edu + migrationb + urban + inc_hh,
                 family = "binomial",
                 data = df_p_windict)

model_gsl4 <- glm(soccult_gendersex ~ galtan_att + att_socialben + age + gender_edu + migrationb + urban + inc_hh,
                 family = "binomial",
                 data = df_p_losedict)

model_gsw4 <- glm(soccult_gendersex ~ galtan_att + att_socialben + age + gender_edu + migrationb + urban + inc_hh,
                 family = "binomial",
                 data = df_p_windict)

pred_scl <- avg_predictions(model_soccultl4, by = "gender_edu") %>% 
  mutate(question = "Mentioned as losing groups",
         outcome = "overall\nsociocultural")
pred_scw <- avg_predictions(model_soccultw4, by = "gender_edu") %>% 
  mutate(question = "Mentioned as winning groups",
         outcome = "overall\nsociocultural")

# race
pred_scrl <- avg_predictions(model_racel4, by = "gender_edu") %>% 
  mutate(question = "Mentioned as losing groups",
         outcome = "race")
pred_scrw <- avg_predictions(model_racew4, by = "gender_edu") %>% 
  mutate(question = "Mentioned as winning groups",
         outcome = "race")

# gender

pred_scgl <- avg_predictions(model_gsl4, by = "gender_edu") %>% 
  mutate(question = "Mentioned as losing groups",
         outcome = "gender and\nsexuality")
pred_scgw <- avg_predictions(model_gsw4, by = "gender_edu") %>% 
  mutate(question = "Mentioned as winning groups",
         outcome = "gender and\nsexuality")

# combine

pred_cult <- bind_rows(pred_scl, pred_scw, pred_scrl, pred_scrw, pred_scgl, pred_scgw) %>% 
  mutate(outcome = fct_relevel(factor(outcome), 
                               "overall\nsociocultural", "race", "gender and\nsexuality"),
         question = fct_rev(factor(question)))

ge <- ggplot(pred_cult, aes(outcome, estimate, color = gender_edu)) +
  geom_pointrange(aes(ymin = conf.low, ymax = conf.high),
                  position = position_dodge(width = 0.5)) +
  facet_wrap(~question) +
  scale_color_grey(start = 0, end = 0.8, name = NULL)+
  theme(legend.position = "bottom")+
  labs(y = "Predicted probability of mentioning groups", x = NULL,
       subtitle = "B) By gender and education"
       )

fig6 <- arrangeGrob(gal, ge,
                    nrow = 2, heights = unit(c(12, 12), c("cm")))

grid.arrange(fig6)
```

# Appendix

## Table A1

```{r}
# nicer labels
 df1 <- df_p %>% 
  mutate(
    party_block_nice = fct_collapse(party_vote,
                               "left party" = c("Sozialdemokratische Partei Deutschlands (SPD)",
                                              "Bündnis 90/Die Grünen",
                                              "Die Linke"),
                               "right party" = c("Christlich Demokratische Union/Christlich-Soziale Union (CDU/CSU)",
                                               "Freie Demokratische Partei (FDP)",
                                               "Alternative für Deutschland (AfD)"),
                               "other or none" = c("anderer Partei", "keiner Partei")),
    party_block_nice = fct_relevel(party_block_nice, "right party", "left party", "other or none"),
    party_vote_nice = fct_recode(party_vote,
                                 "Social democrats (SPD)" = "Sozialdemokratische Partei Deutschlands (SPD)",
                                 "Green party" = "Bündnis 90/Die Grünen",
                                 "Left party (LINKE)" = "Die Linke",
                                 "Christian democrats (CDU/CSU)" = "Christlich Demokratische Union/Christlich-Soziale Union (CDU/CSU)",
                                 "Liberal party (FDP)" = "Freie Demokratische Partei (FDP)",
                                 "Alternative for Germany (AfD)" = "Alternative für Deutschland (AfD)",
                                 "other party" = "anderer Partei", 
                                 NULL = "keiner Partei"),
    party_vote_nice = fct_relevel(party_vote_nice,"Social democrats (SPD)", "Christian democrats (CDU/CSU)",
                                  "Green party", "Liberal party (FDP)", "Alternative for Germany (AfD)",
                                  "Left party (LINKE)", "other party"),
    gender_nice = fct_recode(gender, woman = "female",
                        man = "male",
                        nonbinary = "nonbinary"),
    gender_nice = fct_relevel(gender_nice, "woman", "man", "nonbinary")
  )


sample <- df1 %>%
  select(`Gender` = gender_nice, 
         `Income` = income_group_eq, 
         `Education` = edu, 
         `Age` = age_group, 
         `Migration background` = migrationb, 
         `Residence` = urban,
         `Vote choice` = party_vote_nice)


datasummary(data = sample, 
                 `Gender` + `Education` + `Income` + `Age` + `Migration background` + `Residence` + `Vote choice` + 
                   1 ~  N + Percent(), 
                 notes = c(
                   "Vote choice: party voted in 2021 (or would have voted, if non-citizen);
                   Residence: selfreported degree of urbanity/rurality;
                   Migration background: respondent or at least one parent born abroad;
                   Income: equivalized according to household size."
                           ),
            title = "Sample characteristics"
                 #output = "latex"
                 )
```

## Figure A1


#### Wave 1 Winning groups

```{r}

# 1. remove short answers

df_p <- df_p %>% 
  mutate(nchar_win = nchar(rg_winning)) 

df_p_keepwinw1 <- df_p %>% 
  filter(wave == "w1") %>% 
  filter(nchar_win >=3)

# 2. corpus, tokenization, lower case

corp_rgwinw1 <- corpus(df_p_keepwinw1, text_field = "rg_winning")

toks_winw1 <- tokens(corp_rgwinw1, remove_punct = TRUE, remove_symbols = TRUE, padding = FALSE) %>% 
  tokens_tolower()

# 3. compound those expressions that have meaning (see above)

toks_win_compw1 <- tokens_compound(toks_winw1, pattern = phrase(comp_winning)) 

# 4. stopwords and other terms to delete

toks_win_compw1 <- toks_win_compw1 %>% 
  tokens_remove(pattern = c(stopwords("de", source = 
                        "marimo"), delete), 
                valuetype = "fixed", padding = TRUE)

# 5. text matrix and delete white space

dfmat_winw1 <- toks_win_compw1 %>% 
  dfm() %>% 
  dfm_trim(max_termfreq = 5000, termfreq_type = "count") # to remove white space (or tab)

```

#### Wave 1 Losing groups

```{r}

# 1. remove short answers

df_p <- df_p %>% 
  mutate(nchar_lose = nchar(rg_losing)) 

df_p_keeplosew1 <- df_p %>% 
  filter(wave == "w1") %>% 
  filter(nchar_lose >=3)

# 2. corpus, tokenization, lower case

corp_rglosew1 <- corpus(df_p_keeplosew1, text_field = "rg_losing")

# tokens

toks_losew1 <- tokens(corp_rglosew1, remove_punct = TRUE, remove_symbols = T, padding = F) %>% 
  tokens_tolower()

# 3. compound those expressions that have meaning

toks_lose_compw1 <- tokens_compound(toks_losew1, pattern = phrase(comp_losing))

# 4. stopwords and other terms to delete

toks_lose_compw1 <- toks_lose_compw1 %>% 
  tokens_remove(pattern = c(stopwords("de", source = 
                        "marimo"), 
                        delete
                        ), 
                valuetype = "fixed", padding = TRUE)

# 5. text matrix and delete white space

dfmat_losew1 <- toks_lose_compw1 %>% 
  dfm() %>% 
  dfm_trim(max_termfreq = 5000, termfreq_type = "count") # to remove white space (or tab)

```


#### Wave 1 High groups

```{r}
# 1. remove short answers

df_p <- df_p %>% 
  mutate(nchar_high = nchar(rg_high)) 

df_p_keephighw1 <- df_p %>% 
  filter(wave == "w1") %>% 
  filter(nchar_high >=3)

# 2. corpus, tokenization, lower case

corp_rghighw1 <- corpus(df_p_keephighw1, text_field = "rg_high")

# tokens

toks_highw1 <- tokens(corp_rghighw1, remove_punct = TRUE, remove_symbols = T, padding = F) %>% 
  tokens_tolower()

# 3. compound those expressions that have meaning:

toks_high_compw1 <- tokens_compound(toks_highw1, pattern = phrase(comp_high))

# 4. stopwords and other terms to delete

toks_high_compw1 <- toks_high_compw1 %>% 
  tokens_remove(pattern = c(stopwords("de", source = 
                        "marimo"), delete, "rand", "oben", "oberen"), 
                valuetype = "fixed", padding = TRUE)

# 5. text matrix and delete white space

dfmat_highw1 <- toks_high_compw1 %>% 
  dfm() %>% 
  dfm_trim(max_termfreq = 5000, termfreq_type = "count") # to remove white space (or tab)
```


#### Wave 1 Low groups

```{r}

# 1. remove short answers

df_p <- df_p %>% 
  mutate(nchar_low = nchar(rg_low)) 

df_p_keeploww1 <- df_p %>% 
  filter(wave == "w1") %>% 
  filter(nchar_low >=3)

# 2. corpus, tokenization, lower case

corp_rgloww1 <- corpus(df_p_keeploww1, text_field = "rg_low")

# tokens

toks_loww1 <- tokens(corp_rgloww1, remove_punct = TRUE, remove_symbols = TRUE, padding = F) %>% 
  tokens_tolower()

# 3. compound

toks_low_compw1 <- tokens_compound(toks_loww1, pattern = phrase(comp_low))

# 4. stopwords and other terms to delete

toks_low_compw1 <- toks_low_compw1 %>% 
  tokens_remove(pattern = c(stopwords("de", source = 
                        "marimo"), delete, "rand", "unteren", "oberen"), 
                valuetype = "fixed", padding = TRUE)

# 5. text matrix and delete white space

dfmat_loww1 <- toks_low_compw1 %>% 
  dfm() %>% 
  dfm_trim(max_termfreq = 5000, termfreq_type = "count") # to remove white space (or tab)
```



```{r, fig.height=10, fig.width=8}

# HIGH

high_dimw1 <- textstat_frequency(dfmat_highw1, n = 20)

translations_high <- c("to earn", "the wealthy", "to work", "millionaires", "egoistic", "the rich", "wealthy", "heirs", "wealth", "academics", "arrogant", "power", "doctors", "entrepreneurs", "a lot of money", "managers", "rich", "money", "the rich", "politicians")

high_dimw1 <- dict_df %>% 
  left_join(high_dimw1, ., by = c("feature"))

plot_high_fw1 <- high_dimw1 %>%  
  ggplot(aes(x = reorder(feature, frequency), y = frequency, fill = dimension)) +
  geom_col(width = 0.7, size = 0.2, color = "black") +
  coord_flip() +
  scale_y_continuous(limits = c(0, 410)) +
  scale_fill_manual(values = c("gray1", "gray40", "gray72"),
                    na.value = "gray99",
                    name = NULL,
                    na.translate = F) +
    annotate(geom = "text", x = 1:20, y = 410, size = 2.5, color = "gray25", 
             label = translations_high, hjust = 1)+
  labs(x = NULL, y = "Frequency", subtitle = "Wave 1 - Static: high")+
  theme(legend.position = "bottom",
        axis.text.y = element_text(size = 8))

# LOW

low_dimw1 <- textstat_frequency(dfmat_loww1, n = 20)

translations_low <- c("work", "migration background",  "state", "poor", "money", "welfare recipients", "low-income earners", "income", "to work", "education", "hartz 4", "the sick", "single parents", "recipients", "the poor", "migrants", "homeless", "workers", "pensioners", "the unemployed")

low_dimw1 <- dict_df %>% 
  left_join(low_dimw1, ., by = c("feature"))

plot_low_fw1 <- low_dimw1 %>%  
  ggplot(aes(x = reorder(feature, frequency), y = frequency, fill = dimension)) +
  geom_col(width = 0.7,  size = 0.2, color = "black") +
  coord_flip() +
  scale_fill_manual(values = c("gray1", "gray40", "gray72"),
                    na.value = "gray99",
                    name = NULL,
                    na.translate = F) +
    annotate(geom = "text", x = 1:20, y = 460, size = 2.5, color = "gray25", 
             label = translations_low, hjust = 1)+
  labs(x = NULL, y = "Frequency", subtitle = "Wave 1 - Static: low")+
  theme(legend.position = "bottom",
         axis.text.y = element_text(size = 8))


```


```{r, fig.height=10, fig.width=8}

# Winning

win_dimw1 <-  textstat_frequency(dfmat_winw1, n = 20)


translations_win <- c("women", "craftsmen", "money", "the rich", "to work", "influencers","no idea", "doctors",  "academics", "politicians", "graduates", "homosexuals", "time", "occupations", "work", "children", "migrants", "a lot of money", "social", "workers")


win_dimw1 <- dict_df %>% 
  left_join(win_dimw1, ., by = c("feature"))

plot_win_fw1 <- win_dimw1 %>% 
  ggplot(aes(x = reorder(feature, frequency), y = frequency, fill = dimension)) +
  geom_col(width = 0.7, size = 0.2, color = "black") +
  coord_flip() +
  scale_y_continuous(limits = c(0, 290)) +
  scale_fill_manual(values = c("gray1", "gray40", "gray72"),
                    na.value = "gray99",
                    name = NULL,
                    na.translate = F) +
  annotate(geom = "text", x = 1:20, y = 290, size = 2.5, color = "gray25", 
           label = rev(translations_win), hjust = 1)+
  labs(x = NULL, y = "Frequency", subtitle = "Wave 1 - Dynamic: winning") +
  theme(legend.position = "bottom",
         axis.text.y = element_text(size = 8))

# Losing

lose_dimw1 <-  textstat_frequency(dfmat_losew1, n = 20)

translations_lose <- c("foreigners", "doctors", "education", "migrants", "the poor", "money", "don't know", "occupations", "civil servants", "the unemployed", "the elderly",  "pensioners", "policemen",  "work", "workers", "to work", "teachers", "police", "craftsmen", "politicians")

lose_dimw1 <- dict_df %>% 
  left_join(lose_dimw1, ., by = c("feature"))

plot_lose_fw1 <- lose_dimw1 %>% 
  ggplot(aes(x = reorder(feature, frequency), y = frequency, fill = dimension)) +
  geom_col(width = 0.7, size = 0.2, color = "black") +
  coord_flip() +
  scale_fill_manual(values = c("gray1", "gray40", 
                               "gray72"),
                    na.value = "gray99",
                    name = NULL,
                    na.translate = F) +
  annotate(geom = "text", x = 1:20, y = 400, size = 2.5, color = "gray25", 
           label = translations_lose, hjust = 1)+
  labs(x = NULL, y = "Frequency", subtitle = "Wave 1 - Dynamic: losing") +
  theme(legend.position = "bottom",
         axis.text.y = element_text(size = 8))

```


#### Wave 2 Winning groups

```{r}

# 1. remove short answers

df_p <- df_p %>% 
  mutate(nchar_win = nchar(rg_winning)) 

df_p_keepwinw2 <- df_p %>% 
  filter(wave == "w2") %>% 
  filter(nchar_win >=3)

# 2. corpus, tokenization, lower case

corp_rgwinw2 <- corpus(df_p_keepwinw2, text_field = "rg_winning")

toks_winw2 <- tokens(corp_rgwinw2, remove_punct = TRUE, remove_symbols = TRUE, padding = FALSE) %>% 
  tokens_tolower()

# 3. compound those expressions that have meaning (see above)

toks_win_compw2 <- tokens_compound(toks_winw2, pattern = phrase(comp_winning)) 

# 4. stopwords and other terms to delete

toks_win_compw2 <- toks_win_compw2 %>% 
  tokens_remove(pattern = c(stopwords("de", source = 
                        "marimo"), delete), 
                valuetype = "fixed", padding = TRUE)

# 5. text matrix and delete white space

dfmat_winw2 <- toks_win_compw2 %>% 
  dfm() %>% 
  dfm_trim(max_termfreq = 5000, termfreq_type = "count") # to remove white space (or tab)

```

#### Wave 2 Losing groups

```{r}

# 1. remove short answers

df_p <- df_p %>% 
  mutate(nchar_lose = nchar(rg_losing)) 

df_p_keeplosew2 <- df_p %>% 
  filter(wave == "w2") %>% 
  filter(nchar_lose >=3)

# 2. corpus, tokenization, lower case

corp_rglosew2 <- corpus(df_p_keeplosew2, text_field = "rg_losing")

# tokens

toks_losew2 <- tokens(corp_rglosew2, remove_punct = TRUE, remove_symbols = T, padding = F) %>% 
  tokens_tolower()

# 3. compound those expressions that have meaning

toks_lose_compw2 <- tokens_compound(toks_losew2, pattern = phrase(comp_losing))

# 4. stopwords and other terms to delete

toks_lose_compw2 <- toks_lose_compw2 %>% 
  tokens_remove(pattern = c(stopwords("de", source = 
                        "marimo"), 
                        delete
                        ), 
                valuetype = "fixed", padding = TRUE)

# 5. text matrix and delete white space

dfmat_losew2 <- toks_lose_compw2 %>% 
  dfm() %>% 
  dfm_trim(max_termfreq = 5000, termfreq_type = "count") # to remove white space (or tab)

```


#### Wave 2 High groups

```{r}
# 1. remove short answers

df_p <- df_p %>% 
  mutate(nchar_high = nchar(rg_high)) 

df_p_keephighw2 <- df_p %>% 
  filter(wave == "w2") %>% 
  filter(nchar_high >=3)

# 2. corpus, tokenization, lower case

corp_rghighw2 <- corpus(df_p_keephighw2, text_field = "rg_high")

# tokens

toks_highw2 <- tokens(corp_rghighw2, remove_punct = TRUE, remove_symbols = T, padding = F) %>% 
  tokens_tolower()

# 3. compound those expressions that have meaning:

toks_high_compw2 <- tokens_compound(toks_highw2, pattern = phrase(comp_high))

# 4. stopwords and other terms to delete

toks_high_compw2 <- toks_high_compw2 %>% 
  tokens_remove(pattern = c(stopwords("de", source = 
                        "marimo"), delete, "rand", "oben", "oberen"), 
                valuetype = "fixed", padding = TRUE)

# 5. text matrix and delete white space

dfmat_highw2 <- toks_high_compw2 %>% 
  dfm() %>% 
  dfm_trim(max_termfreq = 5000, termfreq_type = "count") # to remove white space (or tab)
```


#### Wave 2 Low groups

```{r}

# 1. remove short answers

df_p <- df_p %>% 
  mutate(nchar_low = nchar(rg_low)) 

df_p_keeploww2 <- df_p %>% 
  filter(wave == "w2") %>% 
  filter(nchar_low >=3)

# 2. corpus, tokenization, lower case

corp_rgloww2 <- corpus(df_p_keeploww2, text_field = "rg_low")

# tokens

toks_loww2 <- tokens(corp_rgloww2, remove_punct = TRUE, remove_symbols = TRUE, padding = F) %>% 
  tokens_tolower()

# 3. compound

toks_low_compw2 <- tokens_compound(toks_loww2, pattern = phrase(comp_low))

# 4. stopwords and other terms to delete

toks_low_compw2 <- toks_low_compw2 %>% 
  tokens_remove(pattern = c(stopwords("de", source = 
                        "marimo"), delete, "rand", "unteren", "oberen"), 
                valuetype = "fixed", padding = TRUE)

# 5. text matrix and delete white space

dfmat_loww2 <- toks_low_compw2 %>% 
  dfm() %>% 
  dfm_trim(max_termfreq = 5000, termfreq_type = "count") # to remove white space (or tab)
```



```{r, fig.height=10, fig.width=8}

# HIGH

high_dim2 <- textstat_frequency(dfmat_highw2, n = 20)

translations_high <- c("academics", "managers", "education", "don't know", "influence", "no idea", "entrepreneurs", "wealthy", "wealth", "to work", "egoistic", "aloof", "arrogant", "doctors", "rich", "power", "the rich", "a lot of money", "politicians", "money")

high_dim2 <- dict_df %>% 
  left_join(high_dim2, ., by = c("feature"))

plot_high_fw2 <- high_dim2 %>%  
  ggplot(aes(x = reorder(feature, frequency), y = frequency, fill = dimension)) +
  geom_col(width = 0.7, size = 0.2, color = "black") +
  coord_flip() +
  scale_y_continuous(limits = c(0, 410)) +
  scale_fill_manual(values = c("gray1", "gray40", "gray72"
                               ),
                    na.value = "gray99",
                    name = NULL,
                    na.translate = F) +
    annotate(geom = "text", x = 1:20, y = 410, size = 2.5, color = "gray25", 
             label = translations_high, hjust = 1)+
  labs(x = NULL, y = "Frequency", subtitle = "Wave 2 - Static: high")+
  theme(legend.position = "bottom",
        axis.text.y = element_text(size = 8))

# LOW

low_dim2 <- textstat_frequency(dfmat_loww2, n = 20)

translations_low <- c("unemployed", "to afford", "migration background", "craftsmen", "single parents", "hartz 4", "work", "lazy",  "state", "welfare recipients", "workers",  "poor","to work", "money", "the poor", "recipients", "migrants", "homeless", "pensioners", "the unemployed")

low_dim2 <- dict_df %>% 
  left_join(low_dim2, ., by = c("feature"))

plot_low_fw2 <- low_dim2 %>%  
  ggplot(aes(x = reorder(feature, frequency), y = frequency, fill = dimension)) +
  geom_col(width = 0.7,  size = 0.2, color = "black") +
  coord_flip() +
  scale_fill_manual(values = c("gray1", #"gray40", 
                               "gray72"),
                    na.value = "gray99",
                    name = NULL,
                    na.translate = F) +
    annotate(geom = "text", x = 1:20, y = 460, size = 2.5, color = "gray25", 
             label = translations_low, hjust = 1)+
  labs(x = NULL, y = "Frequency", subtitle = "Wave 2 - Static: low")+
  theme(legend.position = "bottom",
         axis.text.y = element_text(size = 8))


```


```{r, fig.height=10, fig.width=8}

# Winning

win_dimw2 <-  textstat_frequency(dfmat_winw2, n = 20)


translations_win <- c("women", "craftsmen", "money", "influencers", "the rich", "to work", "doctors", "homosexuals", "don't know", "migrants", "no idea", "work","academics", "to earn", "politicians", "to afford", "education", "foreigners", "children", "family")

win_dimw2 <- dict_df %>% 
  left_join(win_dimw2, ., by = c("feature"))

plot_win_fw2 <- win_dimw2 %>% 
  ggplot(aes(x = reorder(feature, frequency), y = frequency, fill = dimension)) +
  geom_col(width = 0.7, size = 0.2, color = "black") +
  coord_flip() +
  scale_y_continuous(limits = c(0, 290)) +
  scale_fill_manual(values = c("gray1", "gray40", "gray72"),
                    na.value = "gray99",
                    name = NULL,
                    na.translate = F) +
  annotate(geom = "text", x = 1:20, y = 290, size = 2.5, color = "gray25", 
           label = rev(translations_win), hjust = 1)+
  labs(x = NULL, y = "Frequency", subtitle = "Wave 2 - Dynamic: winning") +
  theme(legend.position = "bottom",
         axis.text.y = element_text(size = 8))

# Losing

lose_dimw2 <-  textstat_frequency(dfmat_losew2, n = 20)

translations_lose <- c("education", "occupations", "the elderly", "crafts", "no idea", "men", "middle stratum", "doctors", "the unemployed", "don't know", "pensioners", "mid tier",  "policemen", "workers", "work", "police", "to work", "teachers", "craftsmen", "politicians")

lose_dimw2 <- dict_df %>% 
  left_join(lose_dimw2, ., by = c("feature"))

plot_lose_fw2 <- lose_dimw2 %>% 
  ggplot(aes(x = reorder(feature, frequency), y = frequency, fill = dimension)) +
  geom_col(width = 0.7, size = 0.2, color = "black") +
  coord_flip() +
  scale_fill_manual(values = c("gray1", "gray40", 
                               "gray72"),
                    na.value = "gray99",
                    name = NULL,
                    na.translate = F) +
  annotate(geom = "text", x = 1:20, y = 400, size = 2.5, color = "gray25", 
           label = translations_lose, hjust = 1)+
  labs(x = NULL, y = "Frequency", subtitle = "Wave 2 - Dynamic: losing") +
  theme(legend.position = "bottom",
         axis.text.y = element_text(size = 8))

```


```{r, fig.height=10, fig.width=8}
plots_freq_high <- arrangeGrob(plot_high_fw1, plot_high_fw2,
                          nrow = 2, heights = unit(c(9, 9), c("cm"))
                         )

grid.arrange(plots_freq_high)
```

```{r, eval = F, include = F}
ggsave(file = "fplot_high_w1w2.pdf", path = "../plots/appendix", plot = plots_freq_high,
       width = 12, height = 18, units = "cm")
```


## Figure A2

```{r, fig.height=10, fig.width=8}
plots_freq_low <- arrangeGrob(plot_low_fw1, plot_low_fw2,
                          nrow = 2, heights = unit(c(9, 9), c("cm"))
                         )

grid.arrange(plots_freq_low)
```

```{r, eval = F, include = F}
ggsave(file = "fplot_low_w1w2.pdf", path = "../plots/appendix", plot = plots_freq_low,
       width = 12, height = 18, units = "cm")
```


## Figure A3

```{r, fig.height=10, fig.width=8}
plots_freq_win <- arrangeGrob(plot_win_fw1, plot_win_fw2,
                          nrow = 2, heights = unit(c(9, 9), c("cm"))
                         )

grid.arrange(plots_freq_win)
```

```{r, eval = F, include = F}
ggsave(file = "fplot_win_w1w2.pdf", path = "../plots/appendix", plot = plots_freq_win,
       width = 12, height = 18, units = "cm")
```


## Figure A4

```{r, fig.height=10, fig.width=8}
plots_freq_lose <- arrangeGrob(plot_lose_fw1, plot_lose_fw2,
                          nrow = 2, heights = unit(c(9, 9), c("cm"))
                         )

grid.arrange(plots_freq_lose)
```

```{r, eval = F, include = F}
ggsave(file = "fplot_lose_w1w2.pdf", path = "../plots/appendix", plot = plots_freq_lose,
       width = 12, height = 18, units = "cm")
```


## Figure C1

```{r}
medium_dim <-  textstat_frequency(dfmat_med, n = 20)

translations_med <- c("job", "no idea", "satisfied", "diligent", "family", "to afford", "civil servants", "work", "working population", "families", "money", "income", "normal", "middle class", "middle class", "craftsmen", "to work", "the normal", "employees", "workers")

medium_dim <- dict_df %>% 
  left_join(medium_dim, ., by = c("feature"))

plot_med_f <- medium_dim %>% 
  ggplot(aes(x = reorder(feature, frequency), y = frequency, fill = dimension)) +
  geom_col(width = 0.7, size = 0.2, color = "black") +
  coord_flip() +
  scale_fill_manual(values = c("gray1", 
                               "gray40", 
                               "gray72"),
                    na.value = "gray99",
                    name = NULL,
                    na.translate = F) +
  annotate(geom = "text", x = 1:20, y = 280, size = 2.5, color = "gray25", 
           label = translations_med, hjust = 1)+
  labs(x = NULL, y = "Frequency", subtitle = "Static: medium") +
  theme(legend.position = "bottom",
         axis.text.y = element_text(size = 8)) 

plot_med_f
```

## Table C1

```{r}
df_dyn_stat %>% 
  group_by(Question) %>% 
  summarise( 
          "Socioeconomic.N" = sum(eco == 1),
          "Socioeconomic.%" = mean(eco) * 100,
          "Sociocultural.N" = sum(cult == 1),
          "Sociocultural.%" = mean(cult) * 100,
          "Education.N" = sum(edu == 1),
          "Education.%" = mean(edu) * 100,
          "Politics.N" = sum(pol == 1),
          "Politics.%" = mean(pol) * 100,
          "Attributes.N" = sum(attr == 1),
          "Attributes.%" = mean(attr) * 100,
          "Dontknow.N" = sum(dk == 1),
          "Dontknow.%" = mean(dk) * 100,
          "Total.N" = n(),
          "Total.%" = mean(eco + cult + edu + pol + attr) * 100
          ) %>% 
  ungroup() %>% 
  pivot_longer(cols  = -Question, names_to = c("Dimension", ".value"), 
    names_sep = "\\.") %>% 
  arrange(fct_relevel(factor(Dimension), 
                            "Socioeconomic", "Sociocultural", "Education",
                            "Politics", "Attributes", "Dontknow", "Total"),
          desc(Question)) %>% 
  relocate(Dimension) %>% 
  kbl(caption = "Salience of dictionary dimensions (percentage of answers mentioning the dimension by question type)",
      booktabs = T,
      digits = 1,
      #format = "latex"
      ) %>% 
  collapse_rows(columns = 1:2, latex_hline = "major") %>% 
  kable_styling()
```

## Table C2

```{r, cache = T}
df_subd <- df_dyn_stat %>%
  group_by(Question) %>% 
  rowwise() %>% 
  mutate(
    sum_ecoocc = sum(c_across(starts_with("socioeconomic.occ")), na.rm = T), # sum per answer,
    sum_ecowealth = sum(c_across(starts_with("socioeconomic.wealth")), na.rm = T), 
    sum_cultgen = sum(c_across(starts_with("sociocultural.gender")), na.rm = T), # sum per answer,
    sum_cultsex = sum(c_across(starts_with("sociocultural.sexuality")), na.rm = T), # sum per answer,
    sum_cultrace = sum(c_across(starts_with("sociocultural.race")), na.rm = T), # sum per answer,
    sum_edu = sum(c_across(starts_with("education")), na.rm = T), # sum per answer
    ecoocc = if_else(sum_ecoocc > 0, 1, 0), # binary: 1 is eco. mentioned, 0 not
    ecowealth = if_else(sum_ecowealth > 0, 1, 0), # binary: 1 is eco. mentioned, 0 not
    cultgen = if_else(sum_cultgen > 0, 1, 0), # binary: 1 is cult. mentioned, 0 not
    cultsex = if_else(sum_cultsex > 0, 1, 0), # binary: 1 is cult. mentioned, 0 not
    cultrace = if_else(sum_cultrace > 0, 1, 0), # binary: 1 is cult. mentioned, 0 not
    edu = if_else(sum_edu > 0, 1, 0), # binary: 1 is edu. mentioned, 0 not

    ) %>% 
  ungroup()


df_subd %>% 
  group_by(Question) %>% 
  summarise( 
          "Socioeconomic.Occupation.N" = sum(ecoocc == 1),
          "Socioeconomic.Occupation.pct" = mean(ecoocc) * 100,
          "Socioeconomic.Wealth.N" = sum(ecowealth == 1),
          "Socioeconomic.Wealth.pct" = mean(ecowealth) * 100,
          "Sociocultural.Gender.N" = sum(cultgen == 1),
          "Sociocultural.Gender.pct" = mean(cultgen) * 100,
          "Sociocultural.Sexuality.N" = sum(cultsex == 1),
          "Sociocultural.Sexuality.pct" = mean(cultsex) * 100,
          "Sociocultural.Race.N" = sum(cultrace == 1),
          "Sociocultural.Race.pct" = mean(cultrace) * 100,
          "Education.Education.N" = sum(edu == 1),
          "Education.Education.pct" = mean(edu) * 100,
          "Total.Total.N" = n(),
          "Total.Total.pct" = 100,
          ) %>% 
  ungroup() %>% 
  pivot_longer(cols  = -Question, names_to = c("Dimension", "Subdimension", ".value"), 
    names_sep = "\\.") %>% 
  arrange(desc(Question), fct_relevel(factor(Dimension), 
                            "Socioeconomic", "Sociocultural", "Education",
                           "Total"
                      )) %>% 
  kbl(caption = "Salience of dictionary subdimensions by question type (percentage of answers mentioning the dimension)",
      booktabs = T,
      digits = 1,
      #format = "latex"
      ) %>% 
  collapse_rows(columns = 1:2, latex_hline = "major") %>% 
  kable_styling()
```

## Figure C2

```{r}
model_soceconl3 <- glm(socioeconomic ~ galtan_group + att_socialben + age + gender_fm + edu + migrationb + urban + inc_hh,
                 family = "binomial",
                 data = df_p_losedict)

model_soceconw3 <- glm(socioeconomic ~ galtan_group + att_socialben + age + gender_fm + edu + migrationb + urban + inc_hh,
                 family = "binomial",
                 data = df_p_windict)

pred_sel <- avg_predictions(model_soceconl3, by = "galtan_group") %>% 
  mutate(question = "Mentioned as losing groups")

pred_sew <- avg_predictions(model_soceconw3, by = "galtan_group") %>% 
  mutate(question = "Mentioned as winning groups")

pred_econ <- bind_rows(pred_sel, pred_sew) %>% 
    mutate(question = fct_rev(factor(question)))

ggplot(pred_econ, aes(question, estimate, color = galtan_group)) +
  geom_pointrange(aes(ymin = conf.low, ymax = conf.high),
                  position = position_dodge(width = 0.5)) +
  scale_color_grey(start = 0.1, end = 0.7, name = "Subgroup")+
  theme(legend.position = "bottom")+
  labs(y = "Predicted probability of mentioning groups", x = NULL
       )
```

## Figure C3

```{r, fig.width=10, fig.height=8}
model_soccultl2 <- glm(sociocultural ~ galtan_att + att_socialben + age + gender_fm + edu + migrationb + urban + inc_hh,
                 family = "binomial",
                 data = df_p_losedict)

model_soccultl2b <- glm(sociocultural ~ galtan_att + I(galtan_att^2) + att_socialben + age + gender_fm + edu + migrationb + urban + inc_hh,
                 family = "binomial",
                 data = df_p_losedict)

model_soccultw2 <- glm(sociocultural ~ galtan_att + att_socialben + age + gender_fm + edu + migrationb + urban + inc_hh,
                 family = "binomial",
                 data = df_p_windict)

model_soccultw2b <- glm(sociocultural ~ galtan_att + I(galtan_att^2) + att_socialben + age + gender_fm + edu + migrationb + urban + inc_hh,
                 family = "binomial",
                 data = df_p_windict)

p1 <- plot_predictions(model_soccultw2, condition = c("galtan_att")) + 
  labs(subtitle = "Continuous GAL-TAN: Winning groups", 
       x = "GAL-TAN attitudes", y = "Pred. probability sociocultural mention") +
  scale_y_continuous(limits = c(0.04, 0.27))

p2 <- plot_predictions(model_soccultl2, condition = c("galtan_att")) + 
  labs(subtitle = "Continuous GAL-TAN: Losing groups",  
       x = "GAL-TAN attitudes", y = "Pred. probability sociocultural mention") +
  theme(axis.title.y = element_blank()) +
  scale_y_continuous(limits = c(0.04, 0.27))

p3 <- plot_predictions(model_soccultw2b, condition = c("galtan_att")) + 
  labs(subtitle = "Polynomial: Winning groups",
       x = "GAL-TAN attitudes", y = "Pred. probability sociocultural mention")+
  scale_y_continuous(limits = c(0.04, 0.27))

p4 <- plot_predictions(model_soccultl2b, condition = c("galtan_att")) + 
  labs(subtitle = "Polynomial: Losing groups",
       x = "GAL-TAN attitudes", y = "Pred. probability sociocultural mention") +
  theme(axis.title.y = element_blank())+
  scale_y_continuous(limits = c(0.04, 0.27))

plots_galtan <- arrangeGrob(p1, p2, p3, p4,
                          ncol = 2, nrow = 2,
                          widths = unit(c(10, 9.5), c("cm")))

grid.arrange(plots_galtan)

```

## Figure C4

```{r, fig.width=8}
pred_scl <- avg_predictions(model_soccultl3, by = "migrationb") %>% 
  mutate(question = "Mentioned as losing groups",
         outcome = "overall\nsociocultural")
pred_scw <- avg_predictions(model_soccultw3, by = "migrationb") %>% 
  mutate(question = "Mentioned as winning groups",
         outcome = "overall\nsociocultural")


# race
pred_scrl <- avg_predictions(model_racel3, by = "migrationb") %>% 
  mutate(question = "Mentioned as losing groups",
         outcome = "race")
pred_scrw <- avg_predictions(model_racew3, by = "migrationb") %>% 
  mutate(question = "Mentioned as winning groups",
         outcome = "race")

# gender

pred_scgl <- avg_predictions(model_gsl3, by = "migrationb") %>% 
  mutate(question = "Mentioned as losing groups",
         outcome = "gender and\nsexuality")
pred_scgw <- avg_predictions(model_gsw3, by = "migrationb") %>% 
  mutate(question = "Mentioned as winning groups",
         outcome = "gender and\nsexuality")

# combine

pred_cult <- bind_rows(pred_scl, pred_scw, pred_scrl, pred_scrw, pred_scgl, pred_scgw) %>% 
  mutate(outcome = fct_relevel(factor(outcome), 
                               "overall\nsociocultural", "race", "gender and\nsexuality"),
         question = fct_rev(factor(question)))

ggplot(pred_cult, aes(outcome, estimate, color = migrationb)) +
  geom_pointrange(aes(ymin = conf.low, ymax = conf.high),
                  position = position_dodge(width = 0.5)) +
  facet_wrap(~question) +
  scale_color_grey(start = 0, end = 0.7, name = "Migration background")+
  theme(legend.position = "bottom")+
  labs(y = "Predicted probability of mentioning groups", x = NULL
       )
```


## Figure C5

```{r}

model_soccultl7 <- glm(sociocultural ~ galtan_group + att_socialben + age_group * gender_fm + edu + migrationb + urban + inc_hh,
                 family = "binomial",
                 data = df_p_losedict)

model_soccultw7 <- glm(sociocultural ~ galtan_group + att_socialben + age_group * gender_fm + edu + migrationb + urban + inc_hh,
                 family = "binomial",
                 data = df_p_windict)

model_racel7 <- glm(soccult_race ~ galtan_group + att_socialben + age_group * gender_fm + edu + migrationb + urban + inc_hh,
                 family = "binomial",
                 data = df_p_losedict)

model_racew7 <- glm(soccult_race ~ galtan_group + att_socialben + age_group * gender_fm + edu + migrationb + urban + inc_hh,
                 family = "binomial",
                 data = df_p_windict)

model_gsl7 <- glm(soccult_gendersex ~ galtan_group + att_socialben + age_group * gender_fm + edu + migrationb + urban + inc_hh,
                 family = "binomial",
                 data = df_p_losedict)

model_gsw7 <- glm(soccult_gendersex ~ galtan_group + att_socialben + age_group * gender_fm + edu + migrationb + urban + inc_hh,
                 family = "binomial",
                 data = df_p_windict)


a1 <- plot_predictions(model_soccultw7, by = c("age_group", "gender_fm")) + 
  labs(title = "Winning groups", subtitle = "Sociocultural overall",
       x = NULL, y = NULL) +
  theme(legend.position = "none") +
  scale_color_grey(start = 0, end = 0.7)+
  scale_y_continuous(limits = c(0, 0.24))

a2 <- plot_predictions(model_soccultl7, by = c("age_group", "gender_fm")) + 
  labs(title = "Losing groups",  subtitle = "Sociocultural overall",
       x = NULL, y = NULL) +
  theme(legend.position = "none")+
  scale_color_grey(start = 0, end = 0.7)+
  scale_y_continuous(limits = c(0, 0.24))

# Race

a3 <- plot_predictions(model_racew7, by = c("age_group", "gender_fm")) + 
  labs(subtitle = "Race",
       x = NULL, y = "Predicted probability of mentioning") +
  theme(legend.position = "none")+
  scale_color_grey(start = 0, end = 0.7)+
  scale_y_continuous(limits = c(0, 0.24))

a4 <- plot_predictions(model_racel7, by = c("age_group", "gender_fm")) + 
  labs(subtitle = "Race",
       x = NULL, y = NULL) +
  theme(legend.position = "none")+
  scale_color_grey(start = 0, end = 0.7)+
  scale_y_continuous(limits = c(0, 0.24))

# Gender and sexuality

a5 <- plot_predictions(model_gsw7, by = c("age_group", "gender_fm")) + 
  labs(subtitle = "Gender/sexuality", color = NULL,
       x = "Age", y = NULL) +
  theme(legend.position = "bottom")+
  scale_color_grey(start = 0, end = 0.7)+
  scale_y_continuous(limits = c(0, 0.24))

a6 <- plot_predictions(model_gsl7, by = c("age_group", "gender_fm")) + 
  labs(subtitle = "Gender/sexuality", color = NULL,
       x = "Age", y = NULL) +
  theme(legend.position = "bottom")+
  scale_color_grey(start = 0, end = 0.7)+
  scale_y_continuous(limits = c(0, 0.24))

```

```{r, fig.height=10, fig.width=10}
plotsag <- arrangeGrob(a1, a2, a3, a4, a5, a6,
                       ncol = 2, widths = unit(c(9, 9), c("cm")),
                       heights = unit(c(8, 7, 9), c("cm")))

grid.arrange(plotsag)
```

## Figure C6

```{r, fig.height=8, fig.width=10}

tstat_key_win_gage <- textstat_keyness(dfm_group(dfmat_win, groups = dfmat_win$gender_age_contrast),
                                  target = "young women") %>% 
  left_join(dict_df, by = c("feature")) %>% 
  mutate(
    targetgroup = if_else(chi2 >= 0, "target", "reference"),
    chi_abs = abs(chi2) 
  ) %>% 
  group_by(targetgroup) %>% 
  slice_max(chi_abs, n = 20, with_ties = F) %>% 
  filter(!is.nan(chi2))
  
# plot

translations_wingage <- c("athletes", "demeanor", "middle class", "craftsmen", "happier", "IT employees", "nursery school teachers", "feminists", "higher", "social", "computer scientists", "caregivers", "to perform", "thinkers", "digitalization", "brought", "belong", "overall", "IT specialists", "artists", "yet", "achieved", "societal", "social media", "active", "simple", "focus", "to fight", "white people", "caregivers", "family", "job", "tolerant", "good", "a lot of money", "to work", "general", "mothers", "men", "women")

ggplot(data = tstat_key_win_gage, aes(x = reorder(feature, chi2), y = chi2, fill = dimension,
                                     label = feature
                                     )) + 
  geom_col(width = 0.7, linewidth = 0.2, color = "black") +
  geom_text(color = "gray25", hjust = "outward", 
            position = position_nudge_center(y = 0.4,
                                               direction = "split")
    ) +
  coord_flip() +
  scale_x_discrete(labels = translations_wingage) +
  scale_y_continuous(
    limits = c(-10, 10)
    ) +
  scale_fill_manual(values = c("gray1", 
                                      #"gray40", 
                                      "gray72"),
                    na.value = "gray99",
                    name = NULL, na.translate = F) +
  labs(x = NULL, y = "Chi2") +
  theme(legend.position = "bottom",
        axis.text.y = element_text(size = 8)) +
  annotate(geom = "text", x = c(-1, 42), y = c(-5, 5), size = 4, color = "black",
           label = c("young men", "young women"))+
  expand_limits(x = c(-2, 43))
```

## Table C3

```{r}
cm <- c(
  "poc_win" = "Mentioned migrants as winners",
  "männer_lose" = "Mentioned men as losers",
  "frauen_win" = "Mentioned women as winners",
  "galtan_groupsocial conservative" = "Social conservative group",
  "econlr_groupecon. right" = "Econ. right group",
  "galtan_att" = "TAN attitudes (cont.)",
  "I(galtan_att^2)" = "TAN attitudes^2",
  "poc_win:galtan_att" = "Migrant winners x TAN attitudes",
  "att_socialben" = "Anti-redistribution attitude (cont.)",
  "age" = "Age",
  "gender_fmwomen" = "Woman",
  "edumedium education" = "Medium education (ref. low)",
  "eduhigh education" = "High education (ref. low)",
  "inc_hh" = "Household-equivalized income",
  "migrationbyes" = "With migration background",
  "urbansuburb/small city" = "Suburbs/small city (ref. country)",
  "urbanbig city" = "Big city (ref. country)"
)

models <- list(
  "Winning" = model_soccultw3,
  "Winning 2" = model_soccultw2,
  "Winning 3" = model_soccultw2b,
  "Losing" = model_soccultl3,
  "Losing 2" = model_soccultl2,
  "Losing 3" = model_soccultl2b
  )

modelsummary(models, coef_map = cm, 
             stars = T,
             #output = "latex",
             title = "Predictors of mentioning sociocultural dictionary dimension",
             notes = "Binomial generalized linear model. Standard errors in parentheses, conf. level: 0.95.")
```

## Table C4

```{r}

model_racel2 <- glm(soccult_race ~ galtan_att + att_socialben + age + gender_fm + edu + migrationb + urban + inc_hh,
                 family = "binomial",
                 data = df_p_losedict)

model_racel2b <- glm(soccult_race ~ galtan_att + I(galtan_att^2) + att_socialben + age + gender_fm + edu + migrationb + urban + inc_hh,
                 family = "binomial",
                 data = df_p_losedict)

model_racew2 <- glm(soccult_race ~ galtan_att + att_socialben + age + gender_fm + edu + migrationb + urban + inc_hh,
                 family = "binomial",
                 data = df_p_windict)

model_racew2b <- glm(soccult_race ~ galtan_att + I(galtan_att^2) + att_socialben + age + gender_fm + edu + migrationb + urban + inc_hh,
                 family = "binomial",
                 data = df_p_windict)


models <- list(
  "Winning" = model_racew3,
  "Winning 2" = model_racew2,
  "Winning 3" = model_racew2b,
  "Losing" = model_racel3,
  "Losing 1" = model_racel2,
  "Losing 3" = model_racel2b
  )


modelsummary(models, coef_map = cm, stars = T,
             #output = "latex",
             title = "Predictors of mentioning racial dictionary dimension",
             notes = "Binomial generalized linear model. Standard errors in parentheses, conf. level: 0.95.")
```


## Table C5

```{r}

model_gsl2 <- glm(soccult_gendersex ~ galtan_att + att_socialben + age + gender_fm + edu + migrationb + urban + inc_hh,
                 family = "binomial",
                 data = df_p_losedict)

model_gsl2b <- glm(soccult_gendersex ~ galtan_att + I(galtan_att^2) + att_socialben + age + gender_fm + edu + migrationb + urban + inc_hh,
                 family = "binomial",
                 data = df_p_losedict)

model_gsw2 <- glm(soccult_gendersex ~ galtan_att + att_socialben + age + gender_fm + edu + migrationb + urban + inc_hh,
                 family = "binomial",
                 data = df_p_windict)

model_gsw2b <- glm(soccult_gendersex ~ galtan_att + I(galtan_att^2) + att_socialben + age + gender_fm + edu + migrationb + urban + inc_hh,
                 family = "binomial",
                 data = df_p_windict)

models <- list(
  "Winning" = model_gsw3,
  "Winning 2" = model_gsw2,
  "Winning 3" = model_gsw2b,
  "Losing" = model_gsl3,
  "Losing 2" = model_gsl2,
  "Losing 3" = model_gsl2b
  )

modelsummary(models, coef_map = cm, stars = T,
             #output = "latex",
             title = "Predictors of mentioning gender/sexuality dictionary dimensions",
             notes = "Binomial generalized linear model. Standard errors in parentheses, conf. level: 0.95.")
```


## Table C6

```{r}
cm <- c(
  "gender_eduwomen without higher ed." = "Women without higher ed.",
  "gender_edumen with higher ed." = "Men with higher ed.",
  "gender_edumen without higher ed." = "Men without higher ed.",
  "galtan_groupsocial conservative" = "Social conservative group",
  "econlr_groupecon. right" = "Econ. right group",
  "galtan_att" = "TAN attitudes (cont.)",
  "I(galtan_att^2)" = "TAN attitudes^2",
  "poc_win:galtan_att" = "Migrant winners x TAN attitudes",
  "att_socialben" = "Anti-redistribution attitude (cont.)",
  "age" = "Age",
  "inc_hh" = "Household-equivalized income",
  "migrationbyes" = "With migration background",
  "urbansuburb/small city" = "Suburbs/small city (ref. country)",
  "urbanbig city" = "Big city (ref. country)"
)

models <- list(
  "Winning" = list(
    "soc-cult" = model_soccultw4,
    "race" = model_racew4,
    "gender" = model_gsw4
  ),
  "Losing" = list(
    "soc-cult" = model_soccultl4,
    "race" = model_racel4,
    "gender" = model_gsl4
  )
)

modelsummary(models, coef_map = cm, stars = T,
             shape = "cbind",
             #output = "latex",
             title = "Predictors of mentioning sociocultural dictionary dimensions, by gender and education",
             notes = "Binomial generalized linear model (reference category:  women with higher education). Standard errors in parentheses, conf. level: 0.95.")
```


## Table C7

```{r}
cm <- c(
  "galtan_groupsocial conservative" = "Social conservative group",
  "econlr_groupecon. right" = "Econ. right group",
  "galtan_att" = "TAN attitudes (cont.)",
  "I(galtan_att^2)" = "TAN attitudes^2",
  "poc_win:galtan_att" = "Migrant winners x TAN att.",
  "frauen_win:galtan_att" = "Women winners x TAN att.",
  "att_socialben" = "Anti-redistribution attitude (cont.)",
  "age" = "Age",
  "gender_fmwomen" = "Woman",
  "edumedium education" = "Medium education (ref. low)",
  "eduhigh education" = "High education (ref. low)",
  "inc_hh" = "Household-equivalized income",
  "migrationbyes" = "With migration background",
  "urbansuburb/small city" = "Suburbs/small city (ref. country)",
  "urbanbig city" = "Big city (ref. country)"
)

model_soceconl2 <- glm(socioeconomic ~ galtan_att + att_socialben + age + gender_fm + edu + migrationb + urban + inc_hh,
                 family = "binomial",
                 data = df_p_losedict)

model_soceconl2b <- glm(socioeconomic ~ galtan_att + I(galtan_att^2) + att_socialben + age + gender_fm + edu + migrationb + urban + inc_hh,
                 family = "binomial",
                 data = df_p_losedict)

model_soceconl3 <- glm(socioeconomic ~ galtan_group + att_socialben + age + gender_fm + edu + migrationb + urban + inc_hh,
                 family = "binomial",
                 data = df_p_losedict)

model_soceconw2 <- glm(socioeconomic ~ galtan_att + att_socialben + age + gender_fm + edu + migrationb + urban + inc_hh,
                 family = "binomial",
                 data = df_p_windict)

model_soceconw2b <- glm(socioeconomic ~ galtan_att + I(galtan_att^2) + att_socialben + age + gender_fm + edu + migrationb + urban + inc_hh,
                 family = "binomial",
                 data = df_p_windict)

model_soceconw3 <- glm(socioeconomic ~ galtan_group + att_socialben + age + gender_fm + edu + migrationb + urban + inc_hh,
                 family = "binomial",
                 data = df_p_windict)


models <- list(
  "Winning" = model_soceconw3,
  "Winning 2" = model_soceconw2,
  "Winning 3" = model_soceconw2b,
  "Losing" = model_soceconl3,
  "Losing 2" = model_soceconl2,
  "Losing 3" = model_soceconl2b
  )

modelsummary(models, coef_map = cm, stars = T,
             #output = "latex",
             title = "Predictors of mentioning socioeconomic dictionary dimensions",
             notes = "Binomial generalized linear model. Standard errors in parentheses, conf. level: 0.95.")
```


## Figure C7


```{r}

dict_dir <- dictionary(list(
  frauen = c("*frauen*", "frau", "feministen", "feministinnen"),
  männer = c("*männer", "mann"),
  poc = c("*migrant*", "migrationshintergrund*", "ausländ*", "zuwander*", "flüchtling*", "kanacke*", "einwander*", "geflüchtet*", "asylant*", "fremd* herkunft*", "fliehen", "geflohen*", "bipoc", "bpoc", "poc"),
  weiss = c("deutsch*", "weiße", "weisse", "deutschland", "volk*", "einheimische*", "bevölkerung*", "land"
            )
))

df_win_dict_dir <- tokens_lookup(toks_win_comp, dict_dir, valuetype = "glob") %>% 
  dfm() %>% dfm_weight("boolean") %>% 
  convert(to = "data.frame") %>% 
  bind_cols(docvars(toks_win_comp, "ResponseId")) %>% 
  rename(ResponseId = last_col()) %>% 
  select(-doc_id) %>% 
  rename_at(vars(-ResponseId), ~paste0(., '_win'))

df_lose_dict_dir <- tokens_lookup(toks_lose_comp, dict_dir, 
                                  valuetype = "glob") %>% 
  dfm() %>% dfm_weight("boolean") %>% 
  convert(to = "data.frame") %>% 
  bind_cols(docvars(toks_lose_comp, "ResponseId")) %>% 
  rename(ResponseId = last_col()) %>% 
  select(-doc_id)%>% 
  rename_at(vars(-ResponseId), ~paste0(., '_lose'))

df_p_win_dict_dir <- left_join(df_win_dict_dir, df_p, by = "ResponseId")
df_p_dict_dir <- left_join(df_p_win_dict_dir, df_lose_dict_dir, by = "ResponseId")

model_white3 <- glm(weiss_lose ~ poc_win + galtan_att + att_socialben + age + gender_fm + edu + migrationb + urban + inc_hh, 
                 family = "binomial",
                 data = df_p_dict_dir)

model_white4 <- glm(weiss_lose ~ poc_win * galtan_att + att_socialben + age + gender_fm + edu + migrationb + urban + inc_hh, 
                 family = "binomial",
                 data = df_p_dict_dir)

pw <- plot_predictions(model_white4, condition = c("galtan_att","poc_win")) +
  labs(subtitle = "Race zero-sum test", x = "GAL-TAN attitudes",
       y = "Pred. probability to mention natives as losers", 
       color = "Mentioning migrants/PoC as winners", fill = "Mentioning migrants/PoC as winners") +
  theme(legend.position = "bottom")

model_men3 <- glm(männer_lose ~ frauen_win + galtan_att + att_socialben + age + gender_fm + edu + migrationb + urban + inc_hh, 
                 family = "binomial",
                 data = df_p_dict_dir)

model_men4 <- glm(männer_lose ~ frauen_win * galtan_att + att_socialben + age + gender_fm + edu + migrationb + urban + inc_hh, 
                 family = "binomial",
                 data = df_p_dict_dir)

pm <- plot_predictions(model_men4, condition = c("galtan_att", "frauen_win")) +
  labs(subtitle = "Gender zero-sum test", x = "GAL-TAN attitudes",
       y = "Pred. probability to mention men as losers", 
       color = "Mentioning women as winners", fill = "Mentioning women as winners") +
  theme(legend.position = "bottom")

```



```{r, fig.width = 5, fig.height=10}
plotszerosum <- arrangeGrob(pw, pm, nrow = 2,
                            heights = unit(c(10, 10), c("cm")))
grid.arrange(plotszerosum)

```

## Table C9

```{r}
cm <- c(
  "poc_win" = "Mentioned migrants as winners",
  "poc_win:galtan_att" = "Migrant winners x TAN att.",
  "männer_lose" = "Mentioned men as losers",
  "frauen_win" = "Mentioned women as winners",
  "frauen_win:galtan_att" = "Women winners x TAN att.",
  "galtan_groupsocial conservative" = "Social conservative group",
  "econlr_groupecon. right" = "Econ. right group",
  "galtan_att" = "TAN attitudes (cont.)",
  "I(galtan_att^2)" = "TAN attitudes^2",
  "att_socialben" = "Anti-redistribution attitude (cont.)",
  "age" = "Age",
  "gender_fmwomen" = "Woman",
  "edumedium education" = "Medium education (ref. low)",
  "eduhigh education" = "High education (ref. low)",
  "inc_hh" = "Household-equivalized income",
  "migrationbyes" = "With migration background",
  "urbansuburb/small city" = "Suburbs/small city (ref. country)",
  "urbanbig city" = "Big city (ref. country)"
)

models <- list(
  "Natives losing 1" = model_white3,
  "Natives losing 2" = model_white4,
  "Men losing 1" = model_men3,
  "Men losing 2" = model_men4
)

modelsummary(models, coef_map = cm, stars = T,
             #output = "latex",
             title = "Predictors of mentioning natives/men as losers, by mentioning migrants/women as winners",
             notes = "Binomial generalized linear model. Standard errors in parentheses, conf. level: 0.95.")
```

